HEAD ======= >>>>>>> e18315de4b052a9ead02773a98c8fae747ea21e1
# cas_dir = "/Volumes/psych-cog/dsnlab/TAG/"
# proj_path = "/Volumes/psych-cog/dsnlab/TAG/projects/timing_tempo_tag/"
# q_path = "/Volumes/psych-cog/dsnlab/TAG/behavior/Questionnaires/"
<<<<<<< HEAD
proj_root = "/Users/clarefmccann/University\ of\ Oregon\ Dropbox/Clare\ McCann/mine/projects/tag-projs/timing_tempo_tag/"
data_root = "/Users/clarefmccann/University\ of\ Oregon\ Dropbox/Clare\ McCann/mine/projects/tag-projs/data-tag/"
## UPDATE FILE NAMES IF NEEDED
pds_file_name = "09.2024_PDS_AllObs_Chronological.csv"
pbip_file_name = "10.2024_PBIP_AllObs_Chronological.csv"
age_file_name = "09.2024_age_long_full.csv"
pub_comp_file_name = "Allwaves_PubertyComposite_2024.05.07.csv"## for puberty self-report variables
### LOAD IN PDS
pds <- read.csv(paste0(data_root, pds_file_name)) %>%
mutate(tagid=as.factor(tagid),
wave=as.factor(wave)) %>%
select(tagid, wave, adrenf2, gonadf2, pdss) %>%
rename("adrenal_pdss" = "adrenf2",
"gonadal_pdss" = "gonadf2")
### LOAD IN PBIP
pbip <- read.csv(paste0(data_root, pbip_file_name)) %>%
mutate(tagid=as.factor(tagid),
wave=as.factor(wave)) %>%
select(tagid, wave, stage, PBIP_1A, PBIP_2A) %>%
rename("adrenal_pbip" = "PBIP_2A",
"gonadal_pbip" = "PBIP_1A")
pub_comp <- read.csv(paste0(data_root, pub_comp_file_name)) %>%
mutate(tagid=as.factor(tagid),
wave=as.factor(wave)) %>%
select(tagid, wave, ADRENcomp, GONADcomp, PUBcomp)
age <- read.csv(paste0(data_root, age_file_name)) %>%
mutate(tagid=paste0("TAG",
substring(tagid,first=5,last=length(tagid)))) %>%
filter(
!wave == "w1s1",
!wave == "w2s1",
!wave == "w3s1",
!wave == "w4s1",
!wave == "w5s1",
!wave == "w6s1"
) %>%
mutate(
wave = ifelse(wave == "w1s2", "1",
ifelse(wave == "w2s2", "2",
ifelse(wave == "w3s2", "3",
ifelse(wave == "w4s2", "4",
ifelse(wave == "w5s2", "5",
ifelse(wave == "w6s2", "6",
wave))))))) %>%
filter(!is.na(age))
puberty_sr <- left_join(pds, pbip, by = c("tagid", "wave"))
puberty_sr <- left_join(puberty_sr, pub_comp, by = c("tagid", "wave"))
puberty_sr <- left_join(puberty_sr, age, by = c("tagid", "wave"))## REMOVE REGRESSING CASES
### CHANGE PUBERTY VAR TO VARIABLE OF INTEREST
# regressing_ids <- puberty_sr %>%
# arrange(tagid, wave) %>%
# group_by(tagid) %>%
# mutate(stage_diff = stage - lag(stage)) %>%
# filter(stage_diff < 0) %>%
# select(id) %>%
# distinct()
#
# puberty_sr <- puberty_sr %>%
# filter(!(tagid %in% regressing_ids$tagid))puberty_sr <- puberty_sr %>%
distinct(tagid, wave, .keep_all = TRUE)
puberty_wide <- puberty_sr %>%
select(tagid, wave, stage) %>%
pivot_wider(names_from = "wave", values_from = "stage") %>%
rename("pub1" = "1",
"pub2" = "2",
"pub3" = "3",
"pub4" = "4",
"pub5" = "5")
age_wide <- puberty_sr %>%
select(tagid, wave, age) %>%
pivot_wider(names_from = "wave", values_from = "age") %>%
rename("age1" = "1",
"age2" = "2",
"age3" = "3",
"age4" = "4",
"age5" = "5")
puberty_wide <- full_join(puberty_wide, age_wide, by = "tagid")
## REMOVE REGRESSING CASES
# regressing_ids <- puberty_sr %>%
# arrange(tagid, wave) %>%
# group_by(tagid) %>%
# mutate(stage_diff = stage - lag(stage)) %>%
# filter(stage_diff < 0) %>%
# select(id) %>%
# distinct()
#
# puberty_sr <- puberty_sr %>%
# filter(!(tagid %in% regressing_ids$tagid))## [1] "mean pdss at each wave"
pdss_means <- puberty_sr %>%
filter(!is.na(pdss)) %>%
group_by(wave) %>%
summarize(mean = mean(pdss))
plot(pdss_means, ylim=c(1,5),
xlab = "wave",
ylab = "PDSS",
main = "plot of means overtime (PDSS)",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean stage (PBIP) score at each wave"
stage_means <- puberty_sr %>%
filter(!is.na(stage)) %>%
group_by(wave) %>%
summarize(mean = mean(stage))
plot(stage_means, ylim=c(1,5),
xlab = "wave",
ylab = "stage (pbip)",
main = "plot of means overtime (PBIP - stage)",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean PUBcomp score at each wave"
PUBcomp_means <- puberty_sr %>%
filter(!is.na(PUBcomp)) %>%
group_by(wave) %>%
summarize(mean = mean(PUBcomp))
plot(PUBcomp_means, ylim=c(1,5),
xlab = "wave",
ylab = "pubcomp",
main = "plot of means overtime (pubcomp)",
pch = 16)## [1] "mean pdss adrenal score at each wave"
### ADRENAL
pdss_adren_means <- puberty_sr %>%
filter(!is.na(adrenal_pdss)) %>%
group_by(wave) %>%
summarize(mean = mean(adrenal_pdss))
plot(pdss_adren_means, ylim=c(1,5),
xlab = "wave",
ylab = "adrenal pdss",
main = "plot of pdss adrenal means overtime",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean pbip adrenal score at each wave"
pbip_adrenal_means <- puberty_sr %>%
filter(!is.na(gonadal_pbip)) %>%
group_by(wave) %>%
summarize(mean = mean(gonadal_pbip))
plot(pbip_adrenal_means, ylim=c(1,5),
xlab = "wave",
ylab = "adrenal pbip",
main = "plot of pbip adrenal means overtime",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean pubcomp adrenal score at each wave"
pubcomp_adrenal_means <- puberty_sr %>%
filter(!is.na(ADRENcomp)) %>%
group_by(wave) %>%
summarize(mean = mean(ADRENcomp))
plot(pubcomp_adrenal_means, ylim=c(1,5),
xlab = "wave",
ylab = "pubcomp adrenal",
main = "plot of comp adrenal means overtime",
pch = 16)## [1] "mean pdss gonadal score at each wave"
pdss_gonad_means <- puberty_sr %>%
filter(!is.na(gonadal_pdss)) %>%
group_by(wave) %>%
summarize(mean = mean(gonadal_pdss))
plot(pdss_gonad_means, ylim=c(1,5),
xlab = "wave",
ylab = "gonadal PDSS",
main = "plot of pdss gonadal means overtime",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean pbip gonadal score at each wave"
pbip_gonad_means <- puberty_sr %>%
filter(!is.na(gonadal_pbip)) %>%
group_by(wave) %>%
summarize(mean = mean(gonadal_pbip))
plot(pbip_gonad_means, ylim=c(1,5),
xlab = "wave",
ylab = "gonadal pbip",
main = "plot of pbip gonadal means overtime",
pch = 16)## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
## [1] "mean comp gonadal score at each wave"
pubcomp_gonad_means <- puberty_sr %>%
filter(!is.na(GONADcomp)) %>%
group_by(wave) %>%
summarize(mean = mean(gonadal_pbip))
plot(pubcomp_gonad_means, ylim=c(1,5),
xlab = "wave",
ylab = "pubcomp gonadal",
main = "plot of pubcomp gonadal means overtime",
pch = 16)adrenal_cor_vars <- puberty_sr[, c("adrenal_pdss", "adrenal_pbip", "ADRENcomp")]
print("printing adrenal cor matrix")## [1] "printing adrenal cor matrix"
## adrenal_pdss adrenal_pbip ADRENcomp
## adrenal_pdss 1.0000000 0.7765446 0.9299664
## adrenal_pbip 0.7765446 1.0000000 0.9291513
## ADRENcomp 0.9299664 0.9291513 1.0000000
gonadal_cor_vars <- puberty_sr[, c("gonadal_pdss", "gonadal_pbip", "GONADcomp")]
print("printing gonadal cor matrix")## [1] "printing gonadal cor matrix"
## gonadal_pdss gonadal_pbip GONADcomp
## gonadal_pdss 1.0000000 0.7040927 0.9144084
## gonadal_pbip 0.7040927 1.0000000 0.9141941
## GONADcomp 0.9144084 0.9141941 1.0000000
## [1] "printing total cor matrix"
## pdss stage PUBcomp
## pdss 1.0000000 0.8348423 0.9455534
## stage 0.8348423 1.0000000 0.9442710
## PUBcomp 0.9455534 0.9442710 1.0000000
## 'data.frame': 877 obs. of 14 variables:
## $ tagid : chr "TAG001" "TAG001" "TAG001" "TAG001" ...
## $ wave : chr "1" "1" "1" "1" ...
## $ adrenal_pdss: int 3 3 3 3 3 5 4 5 2 4 ...
## $ gonadal_pdss: int 2 2 2 2 5 5 5 5 3 2 ...
## $ pdss : num 2.5 2.5 2.5 2.5 4 5 4.5 5 2.5 3 ...
## $ stage : num 2.5 2.5 2.5 2.5 3.5 4 5 4.5 2 3 ...
## $ gonadal_pbip: int 2 2 2 2 4 4 5 4 2 3 ...
## $ adrenal_pbip: int 3 3 3 3 3 4 5 5 2 3 ...
## $ ADRENcomp : num 3 4 4 5 3 4.5 4.5 NA 2 3.5 ...
## $ GONADcomp : num 2 3 3.5 4.5 4.5 4.5 5 NA 2.5 2.5 ...
## $ PUBcomp : num 2.5 3.5 3.75 4.75 3.75 4.5 4.75 NA 2.25 3 ...
## $ X : int 2 2 2 2 4 6 8 10 14 16 ...
## $ age : num 12.3 12.3 12.3 12.3 13.9 ...
## $ date : chr "2016-02-06" "2016-02-06" "2016-02-06" "2016-02-06" ...
puberty_sr$tagid <- as.factor(puberty_sr$tagid)
## means and stuff for stages and things
print("observed first instance for age at TS3 by pdss")## [1] "observed first instance for age at TS3 by pdss"
pdss_avg_age_ts3 <- puberty_sr %>%
filter(pdss == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pdss_avg_age_ts3) ## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.1 10.4 14.2 51
## [1] "observed first instance for age at TS3 by stage (PBIP)"
stage_avg_age_ts3 <- puberty_sr %>%
filter(stage == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(stage_avg_age_ts3)## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.0 10.1 13.6 59
## [1] "observed first instance for age at TS3 by pubcomp"
pubcomp_avg_age_ts3 <- puberty_sr %>%
filter(PUBcomp == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pubcomp_avg_age_ts3) ## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.3 10.7 13.5 27
## [1] "observed first instance for age at TS3 by PDSS adrenal score"
pdss_adrenal_avg_age_ts3 <- puberty_sr %>%
filter(adrenal_pdss == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pdss_adrenal_avg_age_ts3) ## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.2 10.2 16.1 84
## [1] "observed first instance for age at TS3 by pbip adrenal score"
pbip_adrenal_avg_age_ts3 <- puberty_sr %>%
filter(adrenal_pbip == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pbip_adrenal_avg_age_ts3) ## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.2 10.1 14.8 79
## [1] "observed first instance for age at TS3 by pubcomp adrenal score"
pubcomp_adrenal_avg_age_ts3 <- puberty_sr %>%
filter(ADRENcomp == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pubcomp_adrenal_avg_age_ts3) ## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.1 10.2 14.8 43
## [1] "observed first instance for age at TS3 by PDSS gonadal score"
pdss_gonadal_avg_age_ts3 <- puberty_sr %>%
filter(gonadal_pdss == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pdss_gonadal_avg_age_ts3)## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 11.8 10.0 15.3 93
## [1] "observed first instance for age at TS3 by pubcomp gonadal score"
pubcomp_gonadal_avg_age_ts3 <- puberty_sr %>%
filter(GONADcomp == 3) %>%
group_by(tagid) %>%
slice_min(order_by = age, n = 1, with_ties = FALSE) %>%
ungroup() %>%
summarize(average_age = mean(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
n_participants = n_distinct(tagid))
print(pubcomp_gonadal_avg_age_ts3)## # A tibble: 1 × 4
## average_age min_age max_age n_participants
## <dbl> <dbl> <dbl> <int>
## 1 12.0 10.1 14.9 57
## [1] "observed PDSS trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 246 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 246 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed stage (PBIP) trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pubcomp trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 250 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 250 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pdss adrenal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 235 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 235 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pbip adrenal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pubcomp adrenal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 248 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 248 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pdss gonadal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 246 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 246 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pbip gonadal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 245 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).
## [1] "observed pubcomp gonadal trajectories"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).
## getting a better idea of the data we have
obs_per_participant <- puberty_sr %>%
group_by(tagid) %>%
summarise(observations = n()) %>%
ungroup()
obs_distribution <- obs_per_participant %>%
group_by(observations) %>%
summarise(participants_count = n())
print(obs_distribution)## # A tibble: 9 × 2
## observations participants_count
## <int> <int>
## 1 1 53
## 2 2 17
## 3 3 25
## 4 4 39
## 5 5 42
## 6 6 23
## 7 7 10
## 8 8 12
## 9 9 5
### choose what variables you want to use
set.seed(90025)
print("interpretation of values: lambda is timing (or age at peak growth / TS3), alpha is tempo (or rate of change)")## [1] "interpretation of values: lambda is timing (or age at peak growth / TS3), alpha is tempo (or rate of change)"
print("positive or larger values for lambda = earlier timing, positive or larger values for alpha = slower tempo")## [1] "positive or larger values for lambda = earlier timing, positive or larger values for alpha = slower tempo"
puberty_vars <- c("pdss", "stage", "PUBcomp", "adrenal_pbip", "gonadal_pbip", "GONADcomp")
## model does not converge with PDSS or PUBcomp adrenal scores, or PDSS gonadal score
# logistic model with random effects
logistic_model <- function(age, b0, b1, alpha, lambda) {
b0 + (b1 - b0) * (1 / (1 + exp(-alpha * (age - lambda))))
}
start_vals <- list(fixed = c(alpha = 0.95, lambda = 13.1))
results_list <- list()
for (var in puberty_vars) {
print(paste0("fitting model for: ", var))
puberty_sr_tmp <- puberty_sr %>%
rename(puberty = !!sym(var)) %>%
filter(!is.na(puberty) & !is.na(age) & !is.na(tagid))
fit <- nlme(puberty ~ logistic_model(age, 1, 5, alpha, lambda),
data = puberty_sr_tmp,
fixed = alpha + lambda ~ 1,
random = pdDiag(alpha + lambda ~ 1),
groups = ~ tagid,
start = start_vals,
method = "ML")
fitted_values <- fitted(fit)
individual_estimates <- ranef(fit)
individual_estimates_df <- data.frame(id = rownames(individual_estimates), individual_estimates) %>%
rename("tagid" = "id")
puberty_plot <- cbind(fitted_values, puberty_sr_tmp)
puberty_plot <- left_join(puberty_plot, individual_estimates_df, by = "tagid")
fitted_values_plot <- ggplot(data = puberty_plot, aes(x = age, y = fitted_values, color = tagid)) +
geom_point(aes(y = fitted_values), size = 1, shape = 16) +
geom_line(aes(y = fitted_values), size = 0.5, linetype = "solid") +
labs(
title = paste("predicted values -", var),
x = "age",
y = "predicted pubertal stage",
color = "tagid"
) +
theme_minimal() +
theme(legend.position = "none")
print(paste0("printing fitted values plot for", var))
print(fitted_values_plot)
results_list[[var]] <- list(
fit = fit,
fitted_values = fitted_values,
individual_estimates = individual_estimates,
plot = fitted_values_plot
)
# write.csv(puberty_wide, paste0("puberty_wide_", var, ".csv"))
# write.csv(puberty_plot, paste0("puberty_plot_", var, ".csv"))
}## [1] "fitting model for: pdss"
=======
means <- puberty_sr %>%
filter(!is.na(stage)) %>%
group_by(wave) %>%
summarize(mean = mean(stage))
plot(means, ylim=c(1,5),
xlab = "Time",
ylab = "Pubertal Status",
main = "Plot of means for Pubertal Status",
pch = 16)puberty_sr %>%
ggplot(aes(age, stage, group = tagid)) +
geom_line(alpha = 0.1) +
stat_smooth(aes(group=1),
method = "lm", formula = y ~ x + I(x^2), size = 1.5,
level = 0.95,
color = "blue") +
theme_classic() +
labs(x = "Age", y = "Pubertal Status", title = "Pubertal Status")## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
<<<<<<< HEAD
## [1] "printing fitted values plot forpdss"
## [1] "fitting model for: stage"
## [1] "printing fitted values plot forstage"
## [1] "fitting model for: PUBcomp"
## Warning in nlme.formula(puberty ~ logistic_model(age, 1, 5, alpha, lambda), :
## Iteration 1, LME step: nlminb() did not converge (code = 1). PORT message:
## false convergence (8)
## [1] "printing fitted values plot forPUBcomp"
## [1] "fitting model for: adrenal_pbip"
## [1] "printing fitted values plot foradrenal_pbip"
## [1] "fitting model for: gonadal_pbip"
## [1] "printing fitted values plot forgonadal_pbip"
## [1] "fitting model for: GONADcomp"
## [1] "printing fitted values plot forGONADcomp"
all_estimates <- NULL
for (var in names(results_list)) {
estimates <- results_list[[var]]$individual_estimates
estimates_df <- data.frame(tagid = rownames(estimates), estimates, row.names = NULL)
colnames(estimates_df)[-1] <- paste0(var, "_", colnames(estimates_df)[-1])
if (is.null(all_estimates)) {
all_estimates <- estimates_df
} else {
all_estimates <- full_join(all_estimates, estimates_df, by = "tagid")
}
}
write.csv(all_estimates, paste0(proj_root, "TAG_timing_tempo_04.2025.csv"), row.names = FALSE)##### CHANGE IN STARTING VALUES DID NOT SIGNIFICANTLY IMPACT MODEL FIT OR ESTIMATES
# set.seed(12000)
#
# ### remove a few observations and see if results hold
#
# logistic_model <- function(age, b0, b1, alpha, lambda) {
# b0 + (b1 - b0) * (1 / (1 + exp(-alpha * (age - lambda)))) }
#
#
# fit_logistic_model <- function(data, alpha_start, lambda_start) {
#
# start_vals <- list(fixed = c(alpha = alpha_start, lambda = lambda_start))
#
#
# fit <- tryCatch(
# {
# nlme(
# pdss ~ logistic_model(age, 1, 5, alpha, lambda),
# data = puberty_sr,
# fixed = alpha + lambda ~ 1,
# random = pdDiag(alpha + lambda ~ 1),
# groups = ~ tagid,
# start = start_vals,
# method = "ML"
# )
# },
# error = function(e) NULL
# )
#
# return(fit)
# }
#
#
# ## played around with a few different ranges, the model seems to be sensitive to this (won't converge if the range is too big)
# alpha_range <- seq(0.7, 1.0, by = 0.05)
# lambda_range <- seq(12, 13.5, by = 0.1)
#
#
# results <- data.frame(
# alpha = numeric(),
# lambda = numeric(),
# logLik = numeric(),
# AIC = numeric(),
# BIC = numeric()
# )
#
#
# for (alpha_val in alpha_range) {
# for (lambda_val in lambda_range) {
# fit <- fit_logistic_model(puberty_sr, alpha_val, lambda_val)
#
# if (!is.null(fit)) {
#
# results <- results %>%
# add_row(
# alpha = alpha_val,
# lambda = lambda_val,
# logLik = as.numeric(logLik(fit)),
# AIC = AIC(fit),
# BIC = BIC(fit)
# )
# }
# }
# }
#
# best_model <- results %>% arrange(AIC) %>% slice(1)
#
#
# print(results)
#
# # best combination of alpha and lambda
# print(best_model) ## alpha = 0.95, lambda = 12.1 // now it's giving me a different combo, 0.95 and 13.1... i think its bcz i messed with the range of start values to try and added back in random slope bcz it was converging with the new range (i just made the range a bit smaller)## Warning: Removed 75 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 73 rows containing missing values or values outside the scale range
## (`geom_line()`).
# logistic model with random effects
logistic_model <- function(age, b0, b1, alpha, lambda) {
b0 + (b1 - b0) * (1 / (1 + exp(-alpha * (age - lambda)))) }
# the starting values for the parameters
## we're saying 0.9 is the avg rate of pubertal dev & 13.5 is the avg age of mid-puberty
start_vals <- list(fixed = c(alpha = 0.9, lambda = 13.5))
# removing NAs for now but there could be some sort of fancy way to deal with this
puberty_sr <- puberty_sr %>%
filter(!is.na(stage) & !is.na(age) & !is.na(tagid))
# fit THE nonlinear mixed effects model
## set lower asym at stage 1 and upper asym at stage 5, grouping by tagid
fit <- nlme(stage ~ logistic_model(age, 1, 5, alpha, lambda),
data = puberty_sr,
fixed = alpha + lambda ~ 1,
random = pdDiag(alpha + lambda ~ 1),
groups = ~ tagid,
start = start_vals, method = "ML")
# the fitted values
## predicted stage based on an individual's age
fitted_values <- fitted(fit)
# the individual parameter estimates
## alpha = more pos values --> slower dev, more neg values --> faster dev
## lambda = more pos values --> later mid-pubertal peak, more neg values --> earlier mid-pubertal peak
individual_estimates <- ranef(fit)
# summary
summary(fit)## Nonlinear mixed-effects model fit by maximum likelihood
## Model: stage ~ logistic_model(age, 1, 5, alpha, lambda)
## Data: puberty_sr
## AIC BIC logLik
## 847.9173 869.6014 -418.9586
##
## Random effects:
## Formula: list(alpha ~ 1, lambda ~ 1)
## Level: tagid
## Structure: Diagonal
## alpha lambda Residual
## StdDev: 0.2493891 1.001038 0.344459
##
## Fixed effects: alpha + lambda ~ 1
## Value Std.Error DF t-value p-value
## alpha 0.71284 0.02867077 393 24.86294 0
## lambda 11.91998 0.08925884 393 133.54392 0
## Correlation:
## alpha
## lambda 0.183
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.79755430 -0.51944580 0.05755507 0.45128039 2.58638681
##
## Number of Observations: 565
## Number of Groups: 171
# individual estimates + wide
individual_estimates_df <- data.frame(id = rownames(individual_estimates), individual_estimates) %>%
rename("tagid" = "id")
puberty_wide <- left_join(puberty_wide, individual_estimates_df, by = "tagid")
write.csv(puberty_wide, file = "puberty_wide_plus_alpha_lambda.csv")
puberty_plot <- cbind(fitted_values, puberty_sr)
puberty_plot <- left_join(puberty_plot, individual_estimates_df, by = "tagid")
#write.csv(puberty_plot, file = "puberty_long_plus_alpha_lambda.csv")
fitted_values_plot <- ggplot(data = puberty_plot, aes(x = age, y = fitted_values, color = tagid)) +
geom_point(aes(y = fitted_values), size = 1, shape = 16) +
geom_line(aes(y = fitted_values), size = 0.5, linetype = "solid") +
labs(
title = "predicted values - log model",
x = "age",
y = "predicted pubertal stage",
color = "tagid"
) +
theme_minimal() +
theme(legend.position = "none")
print(fitted_values_plot)